Setup

# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)
## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
## 
## $subsetCells
## [1] 500
## 
## $resolution
## [1] 0.6
## 
## $resultsPath
## [1] "./Results"
## 
## $nCores
## [1] 2
## 
## $perplexity
## [1] 30
# load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
 
library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr) 
library(data.table) 
library(readxl) 
library(reshape2)
library(ggrepel)
library(plotly)

library(GeneOverlap) # BiocManager::install("GeneOverlap") 
library(enrichR) #BiocManager::install("enrichR")

## Upgrade to alpha (development) version of Monocle, as this version has been optimized for large datasets
if("DDRTree" %in% rownames(installed.packages())==F){
  devtools::install_github("cole-trapnell-lab/DDRTree", ref="simple-ppt-like")
}
if("graph" %in% rownames(installed.packages())==F){
 devtools::install_github("cole-trapnell-lab/L1-graph") 
}
if("reticulate" %in% rownames(installed.packages())==F){
 install.packages("reticulate")
}
if("flexclust" %in% rownames(installed.packages())==F){
 install.packages("flexclust")
}
if("mcclust" %in% rownames(installed.packages())==F){
 install.packages("mcclust")
}
library(flexclust)
library(mcclust)
library(reticulate)
if(py_module_available("umap-learn")==F){
  reticulate::py_install('umap-learn')# , pip = T, pip_ignore_installed = T # Ensure the latest version of UMAP is installed 
}
## virtualenv: ~/.virtualenvs/r-reticulate 
## Installing packages ...
## 
## Installation complete.
if(py_module_available("louvain")==F){
  reticulate::py_install("louvain")
  system("pip install louvain --user")
}
if("monocle" %in% rownames(installed.packages())==F){
  devtools::install_github("cole-trapnell-lab/monocle-release", ref="monocle3_alpha") 
}
if("ggrastr" %in% rownames(installed.packages())==F){
  devtools::install_github("VPetukhov/ggrastr")
}

library(monocle) # BiocManager::install("monocle") 
# BiocManager::install(c('DelayedArray', 'DelayedMatrixStats', 'org.Hs.eg.db', 'org.Mm.eg.db'))
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
 
#  
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
  allGenes <- F
}else{allGenes <- T}
allGenes
## [1] FALSE
sessioninfo::session_info()
## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.5.1 (2018-07-02)
##  os       macOS  10.14.3              
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2019-03-05                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package          * version    date       lib
##  acepack            1.4.1      2016-10-29 [2]
##  AnnotationDbi    * 1.44.0     2018-10-30 [2]
##  ape                5.2        2018-09-24 [2]
##  assertthat         0.2.0      2017-04-11 [2]
##  backports          1.1.3      2018-12-14 [2]
##  base64enc          0.1-3      2015-07-28 [2]
##  bibtex             0.4.2      2017-06-30 [2]
##  Biobase          * 2.42.0     2018-10-30 [2]
##  BiocGenerics     * 0.28.0     2018-10-30 [2]
##  BiocParallel     * 1.16.6     2019-02-10 [2]
##  bit                1.1-14     2018-05-29 [2]
##  bit64              0.9-7      2017-05-08 [2]
##  bitops             1.0-6      2013-08-17 [2]
##  blob               1.1.1      2018-03-25 [2]
##  boot               1.3-20     2017-08-06 [2]
##  caTools            1.17.1.1   2018-07-20 [2]
##  cellranger         1.1.0      2016-07-27 [2]
##  checkmate          1.9.1      2019-01-15 [2]
##  class              7.3-15     2019-01-01 [2]
##  classInt           0.3-1      2018-12-18 [1]
##  cli                1.0.1      2018-09-25 [2]
##  cluster            2.0.7-1    2018-04-13 [2]
##  coda               0.19-2     2018-10-08 [2]
##  codetools          0.2-16     2018-12-24 [2]
##  colorspace         1.4-0      2019-01-13 [2]
##  cowplot          * 0.9.4      2019-01-08 [2]
##  crayon             1.3.4      2017-09-16 [2]
##  crosstalk          1.0.0      2016-12-21 [2]
##  data.table       * 1.12.0     2019-01-13 [2]
##  DBI                1.0.0      2018-05-02 [2]
##  DDRTree          * 0.1.5      2019-02-22 [1]
##  DelayedArray     * 0.8.0      2018-10-30 [2]
##  deldir             0.1-16     2019-01-04 [1]
##  densityClust       0.3        2017-10-24 [2]
##  DEoptimR           1.0-8      2016-11-19 [2]
##  digest             0.6.18     2018-10-10 [2]
##  diptest            0.75-7     2016-12-05 [2]
##  docopt             0.6.1      2018-10-11 [2]
##  doParallel         1.0.14     2018-09-24 [2]
##  doSNOW             1.0.16     2017-12-13 [2]
##  dplyr            * 0.8.0.1    2019-02-15 [2]
##  dtw                1.20-1     2018-05-18 [2]
##  e1071              1.7-0.1    2019-01-21 [1]
##  enrichR          * 1.0        2017-04-02 [2]
##  evaluate           0.13       2019-02-12 [2]
##  expm               0.999-3    2018-09-22 [2]
##  fastICA            1.2-1      2017-06-12 [2]
##  fitdistrplus       1.0-14     2019-01-23 [2]
##  flexclust        * 1.4-0      2018-09-24 [1]
##  flexmix            2.3-15     2019-02-18 [2]
##  FNN                1.1.3      2019-02-15 [2]
##  foreach            1.4.4      2017-12-12 [2]
##  foreign            0.8-71     2018-07-20 [2]
##  Formula            1.2-3      2018-05-03 [2]
##  fpc                2.1-11.1   2018-07-20 [2]
##  future             1.11.1.1   2019-01-26 [2]
##  garnett          * 0.1.3      2019-02-15 [2]
##  gbRd               0.4-11     2012-10-01 [2]
##  gdata              2.18.0     2017-06-06 [2]
##  GeneOverlap      * 1.18.0     2018-10-30 [1]
##  ggplot2          * 3.1.0      2018-10-25 [2]
##  ggrepel          * 0.8.0      2018-05-09 [2]
##  ggridges           0.5.1      2018-09-27 [2]
##  glmnet             2.0-16     2018-04-02 [2]
##  globals            0.12.4     2018-10-11 [2]
##  glue               1.3.0      2018-07-17 [2]
##  gmodels            2.18.1     2018-06-25 [1]
##  gplots             3.0.1.1    2019-01-27 [2]
##  gridExtra          2.3        2017-09-09 [2]
##  gtable             0.2.0      2016-02-26 [2]
##  gtools             3.8.1      2018-06-26 [2]
##  hdf5r              1.0.1      2018-10-07 [2]
##  Hmisc              4.2-0      2019-01-26 [2]
##  HSMMSingleCell     1.2.0      2018-11-01 [2]
##  htmlTable          1.13.1     2019-01-07 [2]
##  htmltools          0.3.6      2017-04-28 [2]
##  htmlwidgets        1.3        2018-09-30 [2]
##  httpuv             1.4.5.1    2018-12-18 [2]
##  httr               1.4.0      2018-12-11 [2]
##  ica                1.0-2      2018-05-24 [2]
##  igraph           * 1.2.4      2019-02-13 [2]
##  IRanges          * 2.16.0     2018-10-30 [2]
##  irlba            * 2.3.3      2019-02-05 [2]
##  iterators          1.0.10     2018-07-13 [2]
##  jsonlite           1.6        2018-12-07 [2]
##  kernlab            0.9-27     2018-08-10 [2]
##  KernSmooth         2.23-15    2015-06-29 [2]
##  knitr              1.21       2018-12-10 [2]
##  L1Graph          * 0.1.1      2019-02-22 [1]
##  lars               1.2        2013-04-24 [2]
##  later              0.8.0      2019-02-11 [2]
##  lattice          * 0.20-38    2018-11-04 [2]
##  latticeExtra       0.6-28     2016-02-09 [2]
##  lazyeval           0.2.1      2017-10-29 [2]
##  LearnBayes         2.15.1     2018-03-18 [1]
##  limma              3.38.3     2018-12-02 [2]
##  listenv            0.7.0      2018-01-21 [2]
##  lmtest             0.9-36     2018-04-04 [2]
##  lpSolve          * 5.6.13     2015-09-19 [2]
##  lpSolveAPI       * 5.5.2.0-17 2016-01-13 [1]
##  lsei               1.2-0      2017-10-23 [2]
##  magrittr           1.5        2014-11-22 [2]
##  manipulateWidget   0.10.0     2018-06-11 [2]
##  MASS               7.3-51.1   2018-11-01 [2]
##  Matrix           * 1.2-15     2018-11-01 [2]
##  matrixStats      * 0.54.0     2018-07-23 [2]
##  mcclust          * 1.0        2012-07-23 [1]
##  mclust             5.4.2      2018-11-17 [2]
##  memoise            1.1.0      2017-04-21 [2]
##  metap              1.1        2019-02-06 [2]
##  mime               0.6        2018-10-05 [2]
##  miniUI             0.1.1.1    2018-05-18 [2]
##  mixtools           1.1.0      2017-03-10 [2]
##  modeltools       * 0.2-22     2018-07-16 [2]
##  monocle          * 2.99.3     2019-02-22 [1]
##  munsell            0.5.0      2018-06-12 [2]
##  mvtnorm            1.0-8      2018-05-31 [2]
##  nlme               3.1-137    2018-04-07 [2]
##  nnet               7.3-12     2016-02-02 [2]
##  npsurv             0.4-0      2017-10-14 [2]
##  org.Hs.eg.db     * 3.7.0      2019-02-15 [2]
##  pbapply            1.4-0      2019-02-05 [2]
##  pbmcapply          1.3.1      2019-01-14 [1]
##  pheatmap           1.0.12     2019-01-04 [2]
##  pillar             1.3.1      2018-12-15 [2]
##  pkgconfig          2.0.2      2018-08-16 [2]
##  plotly           * 4.8.0      2018-07-20 [2]
##  plyr               1.8.4      2016-06-08 [2]
##  png                0.1-7      2013-12-03 [2]
##  prabclus           2.2-7      2019-01-17 [2]
##  promises           1.0.1      2018-04-13 [2]
##  proxy              0.4-22     2018-04-08 [2]
##  purrr              0.3.0      2019-01-27 [2]
##  qlcMatrix          0.9.7      2018-04-20 [2]
##  R.methodsS3        1.7.1      2016-02-16 [2]
##  R.oo               1.22.0     2018-04-22 [2]
##  R.utils            2.8.0      2019-02-14 [2]
##  R6                 2.4.0      2019-02-14 [2]
##  RANN               2.6.1      2019-01-08 [2]
##  RColorBrewer       1.1-2      2014-12-07 [2]
##  Rcpp               1.0.0      2018-11-07 [2]
##  Rdpack             0.10-1     2018-10-04 [2]
##  readxl           * 1.3.0      2019-02-15 [2]
##  reshape2         * 1.4.3      2017-12-11 [2]
##  reticulate       * 1.10       2018-08-05 [2]
##  rgl                0.99.16    2018-03-28 [2]
##  rjson              0.2.20     2018-06-08 [2]
##  rlang              0.3.1      2019-01-08 [2]
##  rmarkdown          1.11       2018-12-08 [2]
##  robustbase         0.93-3     2018-09-21 [2]
##  ROCR               1.0-7      2015-03-26 [2]
##  rpart              4.1-13     2018-02-23 [2]
##  RSQLite            2.1.1      2018-05-06 [2]
##  rstudioapi         0.9.0      2019-01-09 [2]
##  Rtsne              0.15       2018-11-10 [2]
##  S4Vectors        * 0.20.1     2018-11-09 [2]
##  scales             1.0.0      2018-08-09 [2]
##  SDMTools           1.1-221    2014-08-05 [2]
##  segmented          0.5-3.0    2017-11-30 [2]
##  sessioninfo        1.1.1      2018-11-05 [2]
##  Seurat           * 2.3.4      2018-07-17 [2]
##  sf                 0.7-3      2019-02-21 [1]
##  shiny              1.2.0      2018-11-02 [2]
##  slam               0.1-44     2018-12-21 [2]
##  snow               0.4-3      2018-09-14 [2]
##  sp                 1.3-1      2018-06-05 [1]
##  sparsesvd          0.1-4      2018-02-15 [2]
##  spData             0.3.0      2019-01-07 [1]
##  spdep              1.0-2      2019-02-13 [1]
##  stringi            1.3.1      2019-02-13 [2]
##  stringr            1.4.0      2019-02-10 [2]
##  survival           2.43-3     2018-11-26 [2]
##  tibble             2.0.1      2019-01-12 [2]
##  tidyr              0.8.2      2018-10-28 [2]
##  tidyselect         0.2.5      2018-10-11 [2]
##  trimcluster        0.1-2.1    2018-07-20 [2]
##  tsne               0.1-3      2016-07-15 [2]
##  units              0.6-2      2018-12-05 [1]
##  VGAM               1.1-1      2019-02-18 [1]
##  viridis            0.5.1      2018-03-29 [2]
##  viridisLite        0.3.0      2018-02-01 [2]
##  webshot            0.5.1      2018-09-28 [2]
##  withr              2.1.2      2018-03-15 [2]
##  xfun               0.5        2019-02-20 [1]
##  xtable             1.8-3      2018-08-29 [2]
##  yaml               2.2.0      2018-07-25 [2]
##  zoo                1.8-4      2018-09-19 [2]
##  source                                            
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  Bioconductor                                      
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  Github (cole-trapnell-lab/DDRTree@43f3dc3)        
##  Bioconductor                                      
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  Github (cole-trapnell-lab/garnett@d04a89b)        
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  Github (cran/ggplot2@13ca86f)                     
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.1)                                    
##  Github (cole-trapnell-lab/L1-graph@c2a3f26)       
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Github (cole-trapnell-lab/monocle-release@4d37597)
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
## 
## [1] /Users/schilder/Library/R/3.5/library
## [2] /Library/Frameworks/R.framework/Versions/3.5/Resources/library

Monocle

Convert and Normalize

  • Convert from Seurat object to CDS object and re-normalize the data from scratch (importCDS currently only converts the raw.data, not the scale.data).
  • Alternatively, you can do your normalization in Seurat and then construct a new CDS object manually with the scale.data from the Seurat object.
# Import Seurat obj
load(file.path(root, "Data/seurat_object_add_HTO_ids.Rdata"))
DAT <- seurat.obj  
rm(seurat.obj)
# Add metadata
metadata <- read.table(file.path(root,"Data/meta.data4.tsv"))
DAT <- AddMetaData(object = DAT, metadata = metadata)  
protDAT <- subsetBiotypes(DAT, subsetGenes = "protein_coding")
## Subsetting genes: protein_coding 
## 14827 / 24914 genes are protein_coding
protDAT <- remove_nonmatched_metadata(protDAT, subsetCells = F)
protDAT <- FindVariableGenes(object = protDAT, mean.function = ExpMean, 
                         dispersion.function = LogVMR, 
                         selection.method ="dispersion", do.plot = T, 
                         top.genes = 2000)

# Pass TRUE if you want to see progress output on some of Monocle 3's operations
DelayedArray:::set_verbose_block_processing(TRUE)
## [1] FALSE
# Passing a higher value will make some computations faster but use more memory. Adjust with caution!
options(DelayedArray.block.size=1000e6)

## Construct CDS object manually
# mDAT <- newCellDataSet(cellData = DAT@scale.data,
#                        featureData = AnnotatedDataFrame( data.frame(gene_short_name=row.names(DAT@scale.data),
#                                                                     row.names = row.names(DAT@scale.data))
#                          ),
#                        phenoData = AnnotatedDataFrame(DAT@meta.data))
# mDAT <- estimateSizeFactors(mDAT) #DESeq2?
# mDAT <- preprocessCDS(mDAT, method = "PCA", num_dim = 10, norm_method = "none")


## Construct CDS object automaticaly
### NOTE!: importCDS takes only the raw.data (not scale.data)
mDAT <- monocle::importCDS(protDAT,  import_all = T)
mDAT <- estimateSizeFactors(mDAT) #DESeq2?
mDAT <- estimateDispersions(mDAT)
# Regressing out ID will also regress out mutation status 
mDAT <- preprocessCDS(mDAT, num_dim = 20, residualModelFormulaStr = "~ nUMI + percent.mito") 

Dimensionality Reduction

# !Important! (allows replicability)
set.seed(1) # Monocle sets their seed to 2016 by default (Seurat sets it to 1)
# Options: c("UMAP", "tSNE", "DDRTree", "ICA", "none")

## UMAP
# monocle:::UMAP( )
mDAT <- reduceDimension(mDAT, reduction_method = 'UMAP', 
                        max_components = 3, 
                        metric="cosine",
                        n_threads =  parallel::detectCores())
## t-SNE
# mDAT <- reduceDimension(mDAT, reduction_method = 'tSNE', 
#                         perplexity = 30,
#                         scaling = F,
#                         num_threads = parallel::detectCores()) 

pretty_colors <- function(mDAT, var="Cluster", palette="custom"){
  unique_var <- unique(as.character(pData(mDAT)[var][,1]))
  if(palette=="custom"){
    col_vector_origin <- c("mediumorchid","deepskyblue","limegreen","steelblue",
                           "hotpink","turquoise", "blueviolet","mediumvioletred",
                           
                           "#db83da","#53c35d","#a546bb","#718fe8","#a469e6",
                         "#babb3d","#4f66dc","#e68821","#83b837","#d6ac3e",
                         "#7957b4","#468e36","#d347ae","#5dbf8c","#e53e76",
                         "#42c9b8","#dd454a","#3bbac6","#d5542c","#59aadc",
                         "#cf8b36","#4a61b0","#8b8927","#a24e99","#9cb36a",
                         "#ca3e87","#36815b","#b23c4e","#5c702c","#b79add",
                         "#a55620","#5076af","#e38f67","#85609c","#caa569",
                         "#9b466c","#88692c","#dd81a9","#a35545","#e08083",
                         "#17becf","#9edae5")
  } else{col_vector_origin <- RColorBrewer::brewer.pal(length(unique_var),palette)} 
  # barplot(1:length(col_vector_origin) , col=col_vector_origin) 
  col_vector <- col_vector_origin[1:length(unique_var)]
  names(col_vector) <- unique_var
  return(col_vector)
} 

Clustering

  • “Here res represents the resolution parameter (range from 0-1) for the louvain clustering. Values between 0 and 1e-2 are good, bigger values give you more clusters. Default is set to be 1e-4. louvain_iter represents the number of iterations used for Louvain clustering. The clustering result gives the largest modularity score that will be used as the final clustering result. The default is 1.”

Test Clustering Hyperparameters

plotDR <- function(resDAT, metavar="Cluster", title=""){ 
  # options(repr.plot.width = 11)
  # options(repr.plot.height = 8)
  col_vector <- pretty_colors(resDAT, var=metavar) 
  p <- plot_cell_clusters(resDAT,
                     color_by = metavar,
                     cell_size = 0.8,
                     show_group_id = T)  + 
    scale_color_manual(values = col_vector) +
    theme(legend.text=element_text(size=6)) +
    theme(legend.position="right")+ 
    labs(title = paste(title)) 
  print(p)
}

test_hyperparameters <- function(mDAT, resolutions=seq(0, 1e-4,length.out=6),
                                 Ks=seq(10,60,length.out=6), 
                                 iter_var="k"){
  if(iter_var=="resolution"){
    for(res in resolutions){
      res_title <- paste("Resolution =",res)
      cat("\n")
      cat("####",res_title,"\n")
      try({ 
       resDAT <- clusterCells(mDAT, res=res,# k=43,
                          method = "louvain",# densityPeak
                          louvain_iter = 1,
                          verbose = F, 
                          clustering_genes = clustering_genes,
                          cores = parallel::detectCores())
      plotDR(resDAT, title=res_title)  
      }) 
      cat("\n")
    }  
  }else{
    for(k in Ks){
      k_title <- paste("K =",k)
      cat("\n")
      cat("###",k_title,"\n")
      try({ 
       kDAT <- clusterCells(mDAT, res=8.888889e-05, k=k,
                          method = "louvain",# densityPeak
                          louvain_iter = 1,
                          verbose = F, 
                          clustering_genes = clustering_genes,
                          cores = parallel::detectCores())
      plotDR(kDAT, title=k_title)  
      })
      cat("\n")
    }   
  } 
}
test_hyperparameters(mDAT, iter_var="resolution")

Resolution = 0

## Loading required package: ggrastr

Resolution = 2e-05

Resolution = 4e-05

Resolution = 6e-05

Resolution = 8e-05

Resolution = 1e-04

# test_hyperparameters(mDAT, iter_var="k")

Final Clustering Selection

# Select final resolution
## Use ONLY the 1000 most variable genes to do clustering (otherwise there's too much noise)
clustering_genes <- intersect(mDAT@featureData@data$gene_short_name, protDAT@var.genes)[1:1000]
louvain_res <- 1e-04 # 1e-04 is the default
mDAT <- clusterCells(mDAT, res=louvain_res,
                        method = "louvain",
                        louvain_iter = 1, 
                        clustering_genes = clustering_genes,
                        verbose = T, cores = parallel::detectCores())
## Run kNN based graph clustering starts:
##   -Input data of 19144 rows and 3 columns
##   -k is set to 20
##   Finding nearest neighbors...DONE ~ 0.172 s
##   Compute jaccard coefficient between nearest-neighbor sets ...DONE ~ 0.008 s
##   Build undirected graph from the weighted links ...DONE ~ 0.125 s
##   Run louvain clustering on the graph ...
## Running louvain iteration  1 ...
## Current iteration is 1; current resolution is 1e-04; Modularity is 0.246722458537894; Number of clusters are 6
## Maximal modularity is 0.246722458537894; corresponding resolution is 1e-04
## 
## Run kNN based graph clustering DONE, totally takes 3.07938814163208 s.
##   -Number of clusters: 6
# mDAT <- clusterCells(mDAT, num_clusters = 4,
#                         method = "densityPeak",    
#                         verbose = F,  
#                         cores = parallel::detectCores())
plotDR(mDAT, "Cluster")

plot_cell_clusters(mDAT, color_by = "dx", cell_size = .8)

plot_cell_clusters(mDAT, color_by = "mut", cell_size = .8) 

# CLUSTERING FROM SEURAT
# plotDR(mDAT, "post_clustering")

Identify Cell Types with Garnett

# generate size factors for normalization later
# Get pre-trained PBMC classifer 
load(file.path(root, "Data/Garnett/hsPBMC")) # Download from: https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC  

mDAT <- garnett::classify_cells(mDAT, 
                           classifier = hsPBMC,
                           db = org.Hs.eg.db,
                           cluster_extend = TRUE,
                           cds_gene_id_type = "SYMBOL")
## Processing block 1/22 ... OK
## Processing block 2/22 ... OK
## Processing block 3/22 ... OK
## Processing block 4/22 ... OK
## Processing block 5/22 ... OK
## Processing block 6/22 ... OK
## Processing block 7/22 ... OK
## Processing block 8/22 ... OK
## Processing block 9/22 ... OK
## Processing block 10/22 ... OK
## Processing block 11/22 ... OK
## Processing block 12/22 ... OK
## Processing block 13/22 ... OK
## Processing block 14/22 ... OK
## Processing block 15/22 ... OK
## Processing block 16/22 ... OK
## Processing block 17/22 ... OK
## Processing block 18/22 ... OK
## Processing block 19/22 ... OK
## Processing block 20/22 ... OK
## Processing block 21/22 ... OK
## Processing block 22/22 ... OK
## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
table(pData(mDAT)$cell_type)
## 
##         B cells           CD34+     CD4 T cells     CD8 T cells 
##              13              92               3               1 
## Dendritic cells       Monocytes        NK cells         T cells 
##             282           15029             804              17 
##         Unknown 
##            2903
cell_summary <- table(pData(mDAT)$cluster_ext_type) 
cell_summary
## 
##         B cells           CD34+     CD4 T cells     CD8 T cells 
##              13              13               3               1 
## Dendritic cells       Monocytes        NK cells         T cells 
##             145           17432             688               9 
##         Unknown 
##             840
# Get feature genes for each cell type
feature_genes <- garnett::get_feature_genes(classifier = hsPBMC,
                                   node = "root",
                                   db = org.Hs.eg.db,
                                   convert_ids = F)
head(feature_genes)  
##                      B cells       CD34+ Dendritic cells     Monocytes
## (Intercept)     -0.884989922  0.47694427    -0.124629734 -0.0301907756
## ENSG00000196154 -0.005499815 -0.01088859    -0.007978714  0.0056365451
## ENSG00000019582  0.030146634 -0.01959215     0.015232191  0.0001883507
## ENSG00000156738  2.193353690 -0.66191294    -1.104899464 -0.7393680112
## ENSG00000177455  2.056641938 -0.71851810    -1.941859710 -1.0448682467
## ENSG00000105369  1.800296145 -0.53835614     0.063380804 -0.1973728377
##                     NK cells      T cells     Unknown
## (Intercept)     -0.911657164  0.524757370 0.949765956
## ENSG00000196154 -0.001627233 -0.005835858 0.026193667
## ENSG00000019582 -0.005880096 -0.023473024 0.003378099
## ENSG00000156738 -0.096566144 -0.602417257 1.011810124
## ENSG00000177455  0.297279508  0.177790907 1.173533704
## ENSG00000105369 -0.618567693 -0.963141845 0.453761563
# If a cell type is called less than n times, re-categorize it as unknown
mDAT$cluster_ext_type_filt <- ifelse(mDAT$cluster_ext_type %in% names(cell_summary[cell_summary<50]), 
                                     "Unknown", mDAT$cluster_ext_type)

plot_cell_clusters(mDAT, color_by ="cell_type", cell_size = .8) +  facet_wrap(~dx)

plot_cell_clusters(mDAT, color_by ="cluster_ext_type", cell_size = .8)  +  facet_wrap(~dx)

plot_cell_clusters(mDAT, color_by ="cluster_ext_type_filt", cell_size = .8) +  facet_wrap(~dx)

Cell Type Counts

cell_proportions_plot <- function(mDAT, metavar){ 
  data <- pData(mDAT)
  cell_tally <- data %>% group_by(eval(parse(text = metavar)), cluster_ext_type_filt) %>% tally()
  colnames(cell_tally)[1] <- metavar
  createDT(cell_tally) 
  p <- ggplot(data=cell_tally, aes(x=eval(parse(text = metavar)), y=n, fill=cluster_ext_type_filt)) + 
    geom_col() + labs(title=paste("Cell Types by",metavar), x=metavar, y="Cell Count") +
  scale_fill_discrete(name = "Cell Type")
  print(p)
} 

for(var in c("dx","mut","Cluster")){
  cat("\n")
  cat("####", var)
  cell_proportions_plot(mDAT, var)
  cat("\n")
}

dx

mut

Cluster

Gene Expression

Monocyte Markers

plot_cell_clusters(mDAT, markers = c("CD14","FCGR3A"), cell_size = 0.8)
## Warning: Using alpha for a discrete variable is not advised.

PD Genes

plot_cell_clusters(mDAT, markers = c("LRRK2","GBA"), cell_size = 0.8)
## Warning: Using alpha for a discrete variable is not advised.

Selected Genes

plot_cell_clusters(mDAT, markers = c("MS4A4A","MS4A6A"), cell_size = 0.8) 
## Warning: Using alpha for a discrete variable is not advised.

Monocyte Count vs. LRRK2 Expression

mDAT_sub <- mDAT[row.names(subset(fData(mDAT), gene_short_name %in% c("LRRK2"))),] 

exp_dat <- exprs(mDAT_sub) %>% t() %>% as.matrix() %>% data.table(keep.rownames = "Cell", key = "Cell")
meta_dat <- pData(mDAT) %>% data.table( keep.rownames = "Cell", key = "Cell")
gene_dat <- exp_dat[meta_dat] %>% subset(cluster_ext_type_filt=="Monocytes") %>%  
  add_count(ID, cluster_ext_type_filt, name = "cell_count") 

ggplot(gene_dat, aes(x=cell_count, y=LRRK2, fill=ID, color=ID)) + geom_point() 

Heatmap

plot_markers_cluster(mDAT, markers = protDAT@var.genes[1:50], minimal_cluster_fraction = 0.05)

Pseudotime

# subsetCDS(mDAT, )
mDAT <- partitionCells(mDAT)
mDAT <- learnGraph(mDAT, RGE_method = 'SimplePPT',
                   n_threads =  parallel::detectCores())
## Warning: Deprecated, use tibble::rownames_to_column() instead.

## Warning: Deprecated, use tibble::rownames_to_column() instead.

## Warning: Deprecated, use tibble::rownames_to_column() instead.
plot_cell_trajectory(mDAT, color_by = "Cluster", cell_size = .01) 

plot_cell_trajectory(mDAT, color_by = "dx", cell_size = .01) 

plot_cell_trajectory(mDAT, color_by = "mut", cell_size = .01) 

plot_cell_trajectory(mDAT, color_by = "cluster_ext_type_filt", cell_size = .01) 

# dir.create(file.path(resultsPath, "pseudotime"), showWarnings = F)
# plot_3d_cell_trajectory(mDAT,
#                         color_by="cell_type",
#                         webGL_filename= file.path(resultsPath, "pseudotime/pseudotime_cellType.html"),
#                         show_backbone=TRUE,
#                         useNULL_GLdev=TRUE)
# plot_3d_cell_trajectory(mDAT, markers = c('FCGR3A'),
#                         webGL_filename=file.path(resultsPath, "pseudotime/pseudotime_FCGR3A.html"),
#                         show_backbone=TRUE,
#                         useNULL_GLdev=TRUE)

Differential Expression: Monocle

  • Tutorial
  • qval = FDR *“The first of the two models is called the full model. This model is essentially a way of predicting the expression value of each gene in a given cell knowing only whether that cell is a fibroblast or a myoblast. The second model, called the reduced model, does the same thing, but it doesn’t know the CellType for each cell. It has to come up with a reasonable prediction of the expression value for the gene that will be used for all the cells…The question Monocle must answer for each gene is how much better the full model’s prediction is than the reduced model’s.”
mDAT_sub <- mDAT[protDAT@var.genes[1:50], mDAT@phenoData$Cluster %in% c(1,2)]

# At ~1min/100 genes, this DGE method will take ~2.5hours to run for 14827 genes 
DEG_df <- differentialGeneTest(mDAT_sub, verbose = T,
                               fullModelFormulaStr = "~Cluster",
                               cores = parallel::detectCores())

# Plot
mDAT_sub_genes <- mDAT_sub[row.names(DEG_df)[1:4],]
plot_genes_jitter(mDAT_sub_genes, grouping = "Cluster", ncol= 4, color_by = "dx", plot_trend = T)

Save Results

save.image(file.path(resultsPath, "scRNAseq_results.RData"))